/*
This file caches the lasso dataset paper.

This is run from other files, thus no CDs or global var defs.
*/

global overwrite_lasso_dataset 0 // change to 1 to recreate the dataset

/*
 ██████╗ ███████╗███╗   ██╗███████╗██████╗  █████╗ ████████╗███████╗    ██████╗  █████╗ ████████╗ █████╗
██╔════╝ ██╔════╝████╗  ██║██╔════╝██╔══██╗██╔══██╗╚══██╔══╝██╔════╝    ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗
██║  ███╗█████╗  ██╔██╗ ██║█████╗  ██████╔╝███████║   ██║   █████╗      ██║  ██║███████║   ██║   ███████║
██║   ██║██╔══╝  ██║╚██╗██║██╔══╝  ██╔══██╗██╔══██║   ██║   ██╔══╝      ██║  ██║██╔══██║   ██║   ██╔══██║
╚██████╔╝███████╗██║ ╚████║███████╗██║  ██║██║  ██║   ██║   ███████╗    ██████╔╝██║  ██║   ██║   ██║  ██║
 ╚═════╝ ╚══════╝╚═╝  ╚═══╝╚══════╝╚═╝  ╚═╝╚═╝  ╚═╝   ╚═╝   ╚══════╝    ╚═════╝ ╚═╝  ╚═╝   ╚═╝   ╚═╝  ╚═╝
 */
noisily display `"use "${dir_data}/lasso_table_data.dta", clear"'
capture use "${dir_data}/lasso_table_data.dta", clear
if _rc | $overwrite_lasso_dataset { // if there's an error, then the file doesn't exist. Make it.

  do "${dir_code}/1.0_load_data.do"

  /* Just keep the individuals in our sample for the tests. */
  keep if (in_sample_individual == 1)

  gen p_it = tested_yn*cia_pos

  ******************************************************************************
  * SYMPTOMS
  ******************************************************************************
  * SYMPTOMS for LASSO
  local name_symptoms___1  "Fever"
  local name_symptoms___2  "New or worsened cough"
  local name_symptoms___3  "New or increased shortness of breath or difficulty breathing"
  local name_symptoms___4  "Chills"
  local name_symptoms___5  "Repeated shaking with chills"
  local name_symptoms___6  "Muscle pain"
  local name_symptoms___7  "Headache"
  local name_symptoms___8  "Sore throat"
  local name_symptoms___9  "New loss of taste or smell"
  local name_symptoms___10 "None of the above"
    scalar csym_num = 10


  local name_sick_symptoms___1 "Fever measured by thermometer"
  local name_sick_symptoms___2 "Felt feverish"
  local name_sick_symptoms___3 "Chills"
  local name_sick_symptoms___4 "Cough"
  local name_sick_symptoms___5 "Sore Throat"
  local name_sick_symptoms___6 "Runny or stuffy nose"
  local name_sick_symptoms___7 "Difficulty breathing"
  local name_sick_symptoms___8 "Muscle pain"
  local name_sick_symptoms___9 "Chest pain"
  local name_sick_symptoms___10 "Abdominal pain"
  local name_sick_symptoms___11 "Nausea or vomiting"
  local name_sick_symptoms___12 "Diarrhea"
  local name_sick_symptoms___13 "Headache"
  local name_sick_symptoms___14 "Fatigue"
  local name_sick_symptoms___15 "Loss of smell or taste"
  local name_sick_symptoms___16 "Other"
    scalar psym_num = 15
  * Symptoms - based on discussions with epidemiologists
  * Loss of sense of smell: anosmia
  gen symptom1 = sick_symptoms___15
  gen nosymptom1 = 1 - symptom1
	label var symptom1 "Loss of smell or taste"
	gen symptom_present1 = symptoms___9
	gen nosymptom_present1 = (symptoms___9 ==0)
	label var symptom_present1 "Loss of smell or taste"

  *All plus fever more than one symptom
  gen symptom2 = (1<sick_symptoms___1   + sick_symptoms___6 + sick_symptoms___10 + sick_symptoms___11  + sick_symptoms___12)*(sick_symptoms___15 ==1)
  gen nosymptom2 = 1 - symptom2
  label var symptom2 "More than one of the others or Fever"
	gen symptom_present2 =  (1<sick_symptoms___1    + sick_symptoms___6 + sick_symptoms___10 + sick_symptoms___11  + sick_symptoms___12)*(sick_symptoms___15 ==1)
	gen nosymptom_present2 = (symptom_present2==0)
	label var symptom_present2 "More than one of the others or Fever"

  *All plus fever more than three symptom
  gen symptom3 = (2<sick_symptoms___1  + sick_symptoms___2 + sick_symptoms___6 + sick_symptoms___10 + sick_symptoms___11  + sick_symptoms___12)*(sick_symptoms___15 ==1)
  gen nosymptom3 = 1 - symptom3
  label var symptom3 "More than three of the others or Fever"
	gen symptom_present3 =  (3<sick_symptoms___1  + sick_symptoms___2   + sick_symptoms___6 + sick_symptoms___10 + sick_symptoms___11  + sick_symptoms___12)*(sick_symptoms___15 ==1)
	gen nosymptom_present3 = (symptom_present3==0)
	label var symptom_present3 "Three or more of the others or Fever"

  sum *symptom*1 *symptom*2 *symptom*3
  sum *symptom*1 *symptom*2 *symptom*3 if cia_pos == 1

  *symptoms
  global level_sympvar
  forvalues k = 1/`=csym_num' {
    rename symptoms___`k' csym`k'
    label variable csym`k' "`name_symptoms___`k''"
    global level_sympvar $level_sympvar csym`k'
  }

  global level_past_sympvar
  forvalues k = 1/`=psym_num' {
    rename sick_symptoms___`k' psym`k'
    label variable psym`k' "`name_sick_symptoms___`k''"
    global level_past_sympvar $level_past_sympvar psym`k'
  }

  *other potential variables
  *employment worked_outside known_contact travel commute flu_shot pregnant general_health age sex race ethnicity school_completed naics_industry family social_distance covid_concern social_concern groceries restaurant retail medical walk work family churchother visitors
  *nonbinary variables create binary variables
  foreach var in employment known_contact general_health sex race school_completed {
    tab `var' , gen(d`var')
  }
  /*
  employment:
    n1: 1 "Yes, working outside of the house (some days or every day)"
    n2: 2 "Yes, teleworking/working from home every day that I work"
    n3: 3 "Yes, but not working this week or furloughed / on temporary layoff"
    n4: 5 "No, unemployed and looking to go back to work"
    n5: 6 "Not employed and not seeking employment (out of the labor force)"
    n6: 7 "No, retired"
    n7: 8 "No, full time student"

  known_contact:
    n1: 0 "No"
    n2: 1 "Yes"
    n3: 3 "Dont know"

  general_health:
    n1: 1 "Excellent"
    n2: 2 "Very Good"
    n3: 3 "Good"
    n4: 4 "Fair"
    n4: 5 "Poor"

  dsex:
    n1: 1 Male
    n2: 2 Female
    n3: 3 Other

  drace:
    n1: 1 American Indian or Alaska Native
    n2: 2 Asian
    n3: 3 Black
    n4: 4 Multi-racial
    n5: 5 Native Hawaiian or Other Pacific Isla..
    n6: 6 White

  dschool_completed:
    n1: 3 "Grade 11 or below (less than high school/not a high school graduate)"
    n2: 4 "Grade 12 or GED (High school graduate)"
    n3: 5 "College 1 year to 3 years (Some college or technical school)"
    n4: 6 "College 4 years or more (College graduate)"
    n4: 9 "Prefer not to answer"

  */
  *reset global variable
  global level_nonvar
  local i = 0
  foreach v of var demployment* worked_outside dknown_contact* dgeneral_health*  dsex* drace* ethnicity dschool_completed*   {
    local ++i
    gen nonvar`i' = `v'
    label variable nonvar`i' "`v'"
    global level_nonvar $level_nonvar nonvar`i'
  }
    scalar nonvar_num = `i'

  compress
  save "${dir_data}/lasso_table_data.dta", replace
}


/*
███████╗ ██████╗ █████╗ ██╗      █████╗ ██████╗     ██╗ ██████╗ ██╗      ██████╗ ██████╗  █████╗ ██╗
██╔════╝██╔════╝██╔══██╗██║     ██╔══██╗██╔══██╗   ██╔╝██╔════╝ ██║     ██╔═══██╗██╔══██╗██╔══██╗██║
███████╗██║     ███████║██║     ███████║██████╔╝  ██╔╝ ██║  ███╗██║     ██║   ██║██████╔╝███████║██║
╚════██║██║     ██╔══██║██║     ██╔══██║██╔══██╗ ██╔╝  ██║   ██║██║     ██║   ██║██╔══██╗██╔══██║██║
███████║╚██████╗██║  ██║███████╗██║  ██║██║  ██║██╔╝   ╚██████╔╝███████╗╚██████╔╝██████╔╝██║  ██║███████╗
╚══════╝ ╚═════╝╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚═╝  ╚═╝╚═╝     ╚═════╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝  ╚═╝╚══════╝
 */
{ // load level* globals and counts of variables (csym_num, psym_num, nonvar_num scalars)

  global level_sympvar
  forvalues i = 1/1000 {
    capture confirm variable csym`i'
    if _rc continue, break
    global level_sympvar $level_sympvar csym`i'
    scalar csym_num=`i'
  }
  noisily display "Found {txt}`=csym_num'{res} current symptoms: {txt}${level_sympvar}"

  global level_past_sympvar
  forvalues i = 1/1000 {
    capture confirm variable psym`i'
    if _rc continue, break
    global level_past_sympvar $level_past_sympvar psym`i'
    scalar psym_num=`i'
  }
  noisily display "Found {txt}`=psym_num'{res} current symptoms: {txt}${level_past_sympvar}"

  global level_nonvar
  forvalues i = 1/1000 {
    capture confirm variable nonvar`i'
    if _rc continue, break
    global level_nonvar $level_nonvar nonvar`i'
    scalar nonvar_num=`i'
  }
  noisily display "Found {txt}`=nonvar_num'{res} current symptoms: {txt}${level_nonvar}"
  noi di ""
}


noisily display `"use "${dir_data}/lasso_table_data_interactions.dta", clear"'
capture use "${dir_data}/lasso_table_data_interactions.dta", clear
if _rc | $overwrite_lasso_dataset {
  /*
  ██████╗       ██╗    ██╗ █████╗ ██╗   ██╗
  ╚════██╗      ██║    ██║██╔══██╗╚██╗ ██╔╝
  █████╔╝█████╗██║ █╗ ██║███████║ ╚████╔╝
  ██╔═══╝ ╚════╝██║███╗██║██╔══██║  ╚██╔╝
  ███████╗      ╚███╔███╔╝██║  ██║   ██║
  ╚══════╝       ╚══╝╚══╝ ╚═╝  ╚═╝   ╚═╝
  */
  quietly { // 2-way
    noisily display "Creating two-way interaction terms for current symp vars {txt}(cc, cn)"
    forvalues i1 = 1/`=csym_num' { // c?
        local pref1 "csym"
        noisily display "{res}`i1' * [{txt}", _c
      forvalues i2 = `=`i1'+1'/`=csym_num' { // cc
        local pref2 "csym"
        local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'_`i1'_`i2'"
        gen byte `varname' = `pref1'`i1' * `pref2'`i2'
        label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2''"
          sum `varname'
          if r(min) == r(max) drop `varname'
          if r(min) == r(max) noisily display "{err}`i2'*{txt},", _c
          else noisily display "`i2',", _c
      } // cc
        noisily display "{res}|| N:{txt}", _c
      forvalues i2 = 1/`=nonvar_num' { // cn
        local pref2 "nonvar"
        local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'_`i1'_`i2'"
        gen byte `varname' = `pref1'`i1' * `pref2'`i2'
        label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2''"
          sum `varname'
          if r(min) == r(max) drop `varname'
          if r(min) == r(max) noisily display "{err}`i2'*{txt},", _c
          else noisily display "`i2',", _c
      } // cn
      noisily display "{res}]"
    } // c?
    noisily display "Creating two-way interaction terms for past symp vars {txt}(pp,pn)"
    forvalues i1 = 1/`=psym_num' { // p?
        local pref1 "psym"
        noisily display "{res}`i1' * [{txt}", _c
      forvalues i2 = `=`i1'+1'/`=psym_num' { // pp
        local pref2 "psym"
        local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'_`i1'_`i2'"
        gen byte `varname' = `pref1'`i1' * `pref2'`i2'
        label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2''"
          sum `varname'
          if r(min) == r(max) drop `varname'
          if r(min) == r(max) noisily display "{err}`i2'*{txt},", _c
          else noisily display "`i2',", _c
      } // pp
        noisily display "{res}|| N:{txt}", _c
      forvalues i2 = 1/`=nonvar_num' { // pn
        local pref2 "nonvar"
        local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'_`i1'_`i2'"
        gen byte `varname' = `pref1'`i1' * `pref2'`i2'
        label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2''"
          sum `varname'
          if r(min) == r(max) drop `varname'
          if r(min) == r(max) noisily display "{err}`i2'*{txt},", _c
          else noisily display "`i2',", _c
      } // pn
      noisily display "{res}]"
    } // p?
    noisily display "Creating two-way interaction terms for non-vars {txt}(nn_i1_i2)"
    forvalues i1 = 1/`=nonvar_num' { // n?
        local pref1 "nonvar"
        noisily display "{res}`i1' * [{txt}", _c
      forvalues i2 = `=`i1'+1'/`=nonvar_num' { // nn
        local pref2 "nonvar"
        local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'_`i1'_`i2'"
        gen byte `varname' = `pref1'`i1' * `pref2'`i2'
        label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2''"
          sum `varname'
          if r(min) == r(max) drop `varname'
          if r(min) == r(max) noisily display "{err}`i2'*{txt},", _c
          else noisily display "`i2',", _c
      }
      noisily display "{res}]"
    } // n?
  } // end 2-way

  /*
  ██████╗       ██╗    ██╗ █████╗ ██╗   ██╗
  ╚════██╗      ██║    ██║██╔══██╗╚██╗ ██╔╝
  █████╔╝█████╗██║ █╗ ██║███████║ ╚████╔╝
  ╚═══██╗╚════╝██║███╗██║██╔══██║  ╚██╔╝
  ██████╔╝      ╚███╔███╔╝██║  ██║   ██║
  ╚═════╝        ╚══╝╚══╝ ╚═╝  ╚═╝   ╚═╝
  ccc, ccn, cnn, ppp, ppn, pnn, nnn
  */
  quietly { // 3-way
    noisily display "Creating three-way interaction terms for current symp vars {txt}(ccc, ccn, cnn)"
    forvalues i1 = 1/`=csym_num' { // c??
        local pref1 "csym"
      forvalues i2 = `=`i1'+1'/`=csym_num' { // cc?
          local pref2 "csym"
          noisily display "{res}`i1'*`i2'*[{txt}", _c
        forvalues i3 = `=`i2'+1'/`=csym_num' { // ccc
          local pref3 "csym"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // ccc
          noisily display "{res}|| N:{txt}", _c
        forvalues i3 = 1/`=nonvar_num' { // ccn
          local pref3 "nonvar"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // ccn
        noisily display "{res}]"
      } // cc?
      forvalues i2 = 1/`=nonvar_num' { // cn?
          local pref2 "nonvar"
          noisily display "{res}`i1'*n`i2'*[N:{txt}", _c
        forvalues i3 = `=`i2'+1'/`=nonvar_num' { // cnn
          local pref3 "nonvar"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // cnn
        noisily display "{res}]"
      } // cn?
    }


    noisily display "Creating three-way interaction terms for current symp vars {txt}(ppp, ppn, pnn)"
    forvalues i1 = 1/`=psym_num' { // p??
        local pref1 "psym"
      forvalues i2 = `=`i1'+1'/`=psym_num' { // pp?
          local pref2 "psym"
          noisily display "{res}`i1'*`i2'*[{txt}", _c
        forvalues i3 = `=`i2'+1'/`=psym_num' { // ppp
          local pref3 "psym"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // ppp
          noisily display "{res}|| N:{txt}", _c
        forvalues i3 = 1/`=nonvar_num' { // ppn
          local pref3 "nonvar"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // ppn
        noisily display "{res}]"
      } // pp?
      forvalues i2 = 1/`=nonvar_num' { // pn?
          local pref2 "nonvar"
          noisily display "{res}`i1'*n`i2'*[N:{txt}", _c
        forvalues i3 = `=`i2'+1'/`=nonvar_num' { // pnn
          local pref3 "nonvar"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // pnn
        noisily display "{res}]"
      } // pn?
    }



    noisily display "Creating three-way interaction terms for nonvars {txt}(nnn)"
    forvalues i1 = 1/`=nonvar_num' { // n??
        local pref1 "nonvar"
      forvalues i2 = `=`i1'+1'/`=nonvar_num' { // nn?
          local pref2 "nonvar"
          noisily display "{res}n`i1'*n`i2'*[N:{txt}", _c
        forvalues i3 = `=`i2'+1'/`=nonvar_num' { // nnn
          local pref3 "nonvar"
          local varname = "`=substr("`pref1'",1,1)'`=substr("`pref2'",1,1)'`=substr("`pref3'",1,1)'_`i1'_`i2'_`i3'"
          gen byte `varname' = `pref1'`i1' * `pref2'`i2' * `pref3'`i3'
          label variable `varname' "`: var label `pref1'`i1'' * `: var label `pref2'`i2'' * `: var label `pref3'`i3''"
            sum `varname'
            if r(min) == r(max) drop `varname'
            if r(min) == r(max) noisily display "{err}`i3'*{txt},", _c
            else noisily display "`i3',", _c
        } // nnn
        noisily display "{res}]"
      } // nn?
    } // n??
  } // end 3-way quietly

  save "${dir_data}/lasso_table_data_interactions.dta", replace
} // end cap load lasso_table_data_interactions

  cap gen  DI = cia_pos // Infectious
  cap gen  Dtau = tested_yn // Tested
